# -*- coding: UTF-8 -*-
"""
@Project , trainee
@File    , clean_duplicate_comments.py
@IDE     , PyCharm
@Author  , 2607750505@qq.com
@Date    , 2025/6/22 18:36
"""
# -*- coding: UTF-8 -*-
"""清理评论表中的重复数据"""
import pymysql
from tools.getDataBase import get_conn


def clean_duplicate_comments():
    """清理评论表中的重复数据"""
    conn, cursor = get_conn()

    try:
        # 查找重复的评论（按电影ID和评论内容）
        print("开始清理重复评论数据...")

        # 删除除了ID最大的以外的所有重复评论
        delete_sql = """
            DELETE FROM comments
            WHERE id NOT IN (
                SELECT keep_id FROM (
                    SELECT MAX(id) AS keep_id
                    FROM comments
                    GROUP BY movie_id, commentContent
                ) AS temp
            )
        """

        cursor.execute(delete_sql)
        deleted_count = cursor.rowcount
        conn.commit()

        print(f"成功删除 {deleted_count} 条重复评论记录")

    except pymysql.MySQLError as e:
        print(f"数据库操作出错: {e}")
        conn.rollback()
    finally:
        conn.close()


if __name__ == '__main__':
    clean_duplicate_comments()